clear
clear matrix
set more off

** Elliott
**cd "Z:\home\elliott\Dropbox\1Spring2016\TUP\data\"

use "Baseline\Tup programme sudan single file.dta"

* Rename some clusters
    replace clus="HAI DAM 1" if clus=="HAI DAM 1 SOUTH"
    gen cluster = .
    replace cluster = 1  if clus=="HAI BANANA"
    replace cluster = 2  if clus=="HAI CITY"
    replace cluster = 3  if clus=="HAI DAM 1"
    replace cluster = 4  if clus=="HAI DAM 2"
    replace cluster = 5  if clus=="HAI DAM NORTH"
    replace cluster = 6  if clus=="HAI PEACE"
    replace cluster = 7  if clus=="HAI SOPIRI"

** Drop unused variables. Comment out this line to keep all.

keep idno cluster  s4* s6_* s7* s8_* s9_* s12_* s16* s17* s18* s19* s20*

** Rename Asset Variables
    rename s8_1 land_owncult
    rename s8_2 land_ownnocult
    rename s8_3 land_ownrent
    rename s8_4 land_rentcult
    rename s8_5 land_communitycult
    rename s9_1b asset_val_house
    rename s9_1a asset_n_house
    rename s9_2b asset_val_homestead
    rename s9_2a asset_n_homestead
    rename s9_3b asset_val_cows
    rename s9_3a asset_n_cows
    rename s9_4b asset_val_smallanimals
    rename s9_4a asset_n_smallanimals
    rename s9_5b asset_val_poultry
    rename s9_5a asset_n_poultry
    rename s9_6b asset_val_plough
    rename s9_6a asset_n_plough
    rename s9_7b asset_val_shed
    rename s9_7a asset_n_shed
    rename s9_8b asset_val_shop
    rename s9_8a asset_n_shop
    rename s9_9b asset_val_radio
    rename s9_9a asset_n_radio
    rename s9_10b asset_val_tv
    rename s9_10a asset_n_tv
    rename s9_11b asset_val_fan
    rename s9_11a asset_n_fan
    rename s9_12b asset_val_mobile
    rename s9_12a asset_n_mobile
    rename s9_13b asset_val_chairtables
    rename s9_13a asset_n_chairtables
    rename s9_14b asset_val_bed
    rename s9_14a asset_n_bed
    rename s9_15b asset_val_bicycle
    rename s9_15a asset_n_bicycle
    rename s9_16b asset_val_carts
    rename s9_16a asset_n_carts
    rename s9_17b asset_val_sewing
    rename s9_17a asset_n_sewing
    rename s9_18b asset_val_net
    rename s9_18a asset_n_net
    rename s9_19b asset_val_netITN
    rename s9_19a asset_n_netITN
    rename s9_20b asset_val_motorcycle
    rename s9_20a asset_n_motorcycle

    rename s12_1 savings_home
    rename s12_2 savings_bank
    rename s12_3 savings_BRAC
    rename s12_4 savings_NGOs
    rename s12_5 savings_other

** Access to services

    rename s6_1 access_healthcenter
    rename s6_2 access_loans
    rename s6_3 access_enrollchild
    rename s6_4 access_foodtransfer
    rename s6_5 access_coops

** Transfers

    rename s7 transfers_any_get
    rename s7_2 transfers_any_give
    rename s7_11d transfers_get1
    rename s7_12d transfers_get2
    rename s7_13d transfers_get3
    rename s7_21d transfers_give1
    rename s7_22d transfers_give2
    rename s7_23d transfers_give3

save Baseline\TUP_baseline, replace

** Merge in de-identified and cleaned income data
    merge 1:1 idno using Baseline\incomes
    rename _m merge_income
    save Baseline\TUP_baseline, replace

** Runs with just household roster.dta

    use "Baseline\household roster", clear
    gen ischild = s14<=18
    gen ischild2 = s14<=15                 
    gen issmallchild = s14<5               
    gen girls    = s14<=15 & s13==0        
    gen boys     = (s14<=15 & s13==1)      
    gen men      = (s14>15  & s13==0)      
    gen women    = (s14>15  & s13==1)      
    gen age=.
    replace age=s14 if ln==1
    bysort idno: egen tribe=mode(s17)
        
    collapse (max) hh_size=ln (sum) age girls boys men women child_total=ischild children=ischild2 smallchildren=issmallchild (mean) tribe, by(idno)

    merge 1:1 idno using Baseline\TUP_baseline
    tab _m
    rename _m merge_HH

    ** (Not making aggregate variables within the cleaning files)

** Rename variables to jive with Midline var names. The key for all of these is 'varmap.csv'. The spreadsheet that takes raw data variable names and gives
** them meaningful variable names in baseline and midline.
    rename s4 in_business
    * rename ln line_no
    rename s4_1b business_type
    label var business_type "s4_1b from baseline"

    foreach day of var s18* {
        replace `day'=0 if `day'==.
        }
    egen served_3days = rowtotal(s18*)

    egen adult_meals = rowmean(s18b_*)
    egen child_meals = rowmean(s18a_*)
    egen all_meals = rowmean(s18*)

    rename s19a_1 c_cereals
    label var c_cereals "s19a_1: maize, potato, millet, etc."
    rename s19a_2 c_beans
    rename s19a_3 c_oil
    rename s19a_4 c_salt
    rename s19a_5 c_sugar
    rename s19a_6 c_meat
    label var c_meat "s19a_6: includes chicken"
    rename s19a_7 c_fish
    rename s19a_8 c_egg
    rename s19a_9 c_milk
    rename s19a_10 c_vegetables
    rename s19a_11 c_fruit
    rename s19a_12 c_spices
    rename s19a_13 c_alcohol
    rename s19a_14 c_otherfood
    rename s20_1m c_fuel 
    rename s20_2m c_cosmetics
    rename s20_3m c_soap
    rename s20_4m c_transport
    rename s20_5m c_entertainment
    rename s20_1y c_clothesfootwear
    rename s20_2y c_utensils
    rename s20_3y c_furniture
    rename s20_4y c_textiles
    rename s20_5y c_ceremonies
    rename s20_6y c_charities
    rename s20_7y c_dowry
    rename s20_8y c_other
   
    rename s16_a2 shock_illness_ssp
    rename s16_a3 shock_illness_cope
    rename s16_c2 shock_theft_ssp
    rename s16_c3 shock_theft_cope
    rename s16_d2 shock_livestockdeath_ssp
    rename s16_d3 shock_livestockdeath_cope
    rename s17_1 fs_worried
    rename s17_2 fs_notpreferred
    rename s17_3 fs_fewkinds
    rename s17_4 fs_preferrednot
    rename s17_5 fs_portions
    rename s17_6 fs_fewmeals
    rename s17_7 fs_nofood
    rename s17_8 fs_hungry
    rename s17_9 fs_wholeday
save Baseline\TUP_baseline, replace


****             NOW MERGE CENSUS        ****
**** Recently moved from census_merge.do ****
****      to inside baseline_cleanup     ****

use census\Census_database.dta, clear
* Respondent ID is called 'idno' in every file
rename resp_id idno

sort idno
merge 1:1 idno using Baseline\TUP_baseline
tab _m

keep if _m==3
rename _merge merge_census

* Merge in Treatment Variables
merge 1:1 idno using Tgroups
drop if _m==2
rename _merge merge_assignment
sort idno

foreach variable of varlist * {
    rename `variable' `variable'_b
    }
rename idno idno

****************************************
    * Clean-up all expenditure & asset data:
    * Save raw values * Set blanks to zero
    * Current Top-code Method: 99th percentile
****************************************

foreach item of varlist c_* asset_val* {
    gen raw_`item' = `item'
    replace `item' = 0 if `item'==.
    quietly: sum `item', d
    replace `item' = r(p99) if `item'>r(p99) & `item' != .
    }


save Baseline\TUP_baseline, replace
outsheet using csv\TUP_baseline.csv, c replace

